% Main Script: run_mc_R02_DGP01.m
%   Description:
%       This script produces the results of a Monte Carlo simulation, where the DGP is set as a
%       simple static factor model and number of factors being 2, with various settings for the
%       number of units (N) and the number of time periods (T)
%   Output:
%       Table 2: Simulation results for the experiment in Section 5.1, N = 100, T = 50, $R = 2
%                (bias, std, rmse)
%       Table 3: Simulation results for the experiment in Section 5.1, N = 100, T = 50, $R = 2
%                (size, length, length*)

% ========================================
% Setting simulation environment and parameters
% ========================================
clear all;
% Randomization seed 
rng(12345)
% Number of cores being used in the parallel computation
parpool('local', 8)
% Number of replications
params.reps = 5000;
% True parameter
params.beta0 = 0;
% Number of factors
params.R0 = 2;
% Save file directory
params.folder_name = 'matlogs/R02_DGP01';
% Number of units and number of time periods
arr_N = 100;
arr_T = 50;
% Strength of factors
arr_kappa_1 = [0:0.05:0.30 0.40 0.50 1.0]';
arr_kappa_2 = [0:0.05:0.30 0.40 0.50 1.0]';

% ========================================
% Loop over all different combinations of N, T, and kappa
% ========================================
% Prepare arrays for outputs
res = cell(length(arr_N),length(arr_T),length(arr_kappa_1),length(arr_kappa_2));
n_cases = length(arr_N)*length(arr_T)*length(arr_kappa_1)*(length(arr_kappa_1)+1)/2;
i_case = 0;
for i_N = 1:length(arr_N)
    for i_T = 1:length(arr_T)
        for i_kappa_1 = 1:size(arr_kappa_1,1)
            for i_kappa_2 = 1:i_kappa_1
                ticID_this_design = tic;
                params.N = arr_N(i_N);
                params.T = arr_T(i_T);
                params.kappa = [arr_kappa_1(i_kappa_1), arr_kappa_2(i_kappa_2)];
                % Run the Monte Carlo simulation for the current parameters
                out = mc_weak_factors(params);
                res{i_N, i_T, i_kappa_1, i_kappa_2}.params = params;
                res{i_N, i_T, i_kappa_1, i_kappa_2}.LS.stats = out.LS.stats;
                res{i_N, i_T, i_kappa_1, i_kappa_2}.H.stats = out.H.stats;
                i_case = i_case + 1;
                fprintf(['Design %i/%i (N,T,kappa)=(%i, %i,' repmat(' %1.2f', 1, length(params.kappa)) ') took %1.2fs\n'], ...
                    i_case, n_cases, params.N, params.T, params.kappa, toc(ticID_this_design));
            end
        end
    end
end

% ========================================
% Construct oracle confidence intervals
% ========================================
% These oracle CIs are based on unknown design-specific least favorable critical values
LF_cv_LS = cell(size(res,1), size(res,2));
LF_cv = cell(size(res,1), size(res,2));
for i_N = 1:size(res,1)
    for i_T = 1:size(res,2)
        cnt = 0;
        for i_kappa_1 = 1:size(res,3)
            for i_kappa_2 = 1:size(res,4)
                res_tmp = res{i_N, i_T, i_kappa_1, i_kappa_2};
                if ~isempty(res_tmp)
                    cnt = cnt + 1;
                    cv_LS_tmp(:,:,:,cnt) = res_tmp.LS.stats.cv;
                    cv_tmp(:,:,cnt) = res_tmp.H.stats.cv;
                end
            end
        end
        % Maximize over kappa dimension
        LF_cv_LS{i_N, i_T} = max(cv_LS_tmp, [], 4);
        LF_cv{i_N, i_T} = max(cv_tmp, [], 3);
    end   
end
% Convert results to arrays
for i = 1:numel(LF_cv_LS)
    LF_cv_LS_arr(:,:,:,i) = LF_cv_LS{i}; 
    LF_cv_arr(:,:,i) = LF_cv{i};
end
% Maximum across (N, T)
LF_cv_LS_max = max(LF_cv_LS_arr, [], 4);
LF_cv_max = max(LF_cv_arr, [], 3);

% ========================================
% Save the results
% ========================================
save(sprintf('%s/MC_merged_w_LFCV',params.folder_name),'res','LF_cv_LS','LF_cv_LS_max','LF_cv','LF_cv_max')
% Close parallel computation
if isunix
    delete(gcp('nocreate')); %cleanup
end

% ========================================
% Output Table 2 & 3 as CSV files
% ========================================
if ~isfolder('tables')
    mkdir('tables')
end
% File list
folder_name = 'R02_DGP01';
fileinfo = dir(['matlogs/' folder_name '/*.mat']);
f_names = {fileinfo.name};
f_name_w_LFCV = f_names(contains(f_names,'w_LFCV'));
load(['matlogs/' folder_name '/' f_name_w_LFCV{:}]);
% Column names and row names for output table
rowLabels = {};
for i_kappa_1=1:length(arr_kappa_1)
    rowLabels{i_kappa_1} = sprintf('%1.2f',arr_kappa_1(i_kappa_1));
end
columnLabels = {};
for i_kappa_2=1:length(arr_kappa_2)
    columnLabels{i_kappa_2} = sprintf('%1.2f',arr_kappa_2(i_kappa_2));
end
% List of statistics
statsList = {'bias', 'std', 'rmse', 'size', 'len', 'LFCV'};
% Loop over all data
for i_N=1:length(arr_N)
    N=arr_N(i_N);
    for i_T=1:length(arr_T)
        T=arr_T(i_T);
        [tab_bias, tab_std, tab_rmse, tab_size, tab_len, tab_LFCV] = deal([]);
        f_names_dgp = f_names(contains(f_names,sprintf('N%i_T%i',N,T)));
        for i_kappa_1=1:length(arr_kappa_1)
            for i_kappa_2=1:length(arr_kappa_2)
                f_names_dgp_kappa = f_names_dgp(contains(f_names_dgp,sprintf('kappa%03d_%03d',round([arr_kappa_1(i_kappa_1) arr_kappa_2(i_kappa_2)]*100))));
                if ~isempty(f_names_dgp_kappa)
                    load(['matlogs/' folder_name '/' f_names_dgp_kappa{:}])
                    if isequal(round(out.params.kappa,2), round([arr_kappa_1(i_kappa_1) arr_kappa_2(i_kappa_2)],2))
                        % LS.bias and and LS.rmse each contains three components, which are under
                        %   (i) no correction, (ii) correcting bcorr2 & bcorr3, (iii) correcting
                        %       bcorr1 & bcorr2 & bcorr3 in LS_factor.m
                        % LS.std contains three components, which are under error assumption of
                        %   (i) homoscedasticity, (ii) heteroscedasticity, (iii) serial correlation
                        % LS.stats.rp and LS.stats.length are 3*3*3 arrays, where (2,1,2) represents 
                        %   (i) selecting alpha = 0.05 from [0.10 0.05 0.01]
                        %   (ii) selecting beta estimate with no correction
                        %       from [no_correction correcting_2_and_3 correcting_1_and_2_and_3]
                        %   (iii) selecting se with heteroscedasticity assumption
                        %       from [homoscedasticity  heteroscedasticity  serial_correlation]
                        % H.stats elements all only have one component
                        tab_bias = [tab_bias; out.params.kappa(1), out.params.kappa(2), out.LS.stats.bias(1), out.H.stats.bias(1)];
                        tab_std  = [tab_std;  out.params.kappa(1), out.params.kappa(2), out.LS.stats.std(1), out.H.stats.std(1)];
                        tab_rmse = [tab_rmse; out.params.kappa(1), out.params.kappa(2), out.LS.stats.rmse(1), out.H.stats.rmse(1)];
                        tab_size = [tab_size; out.params.kappa(1), out.params.kappa(2), out.LS.stats.rp(2,1,2)*100, out.H.stats.size(1)*100];
                        tab_len  = [tab_len;  out.params.kappa(1), out.params.kappa(2), out.LS.stats.length(2,1,2), out.H.stats.length(1)];
                        tab_LFCV = [tab_LFCV; out.params.kappa(1), out.params.kappa(2), mean(2*out.LS.se(:,1,2)*LF_cv_LS{i_N,i_T}(2,1,2)), mean(2*out.H.se(:,1)*LF_cv{i_N,i_T}(2,1))];
                    end
                end
            end
        end
    end
end
% Construct Table 2 and 3
rowNames = unique(tab_bias(:,1));
colNames = unique(tab_bias(:,2));
[tab_ls_bias, tab_h_bias, tab_ls_std, tab_h_std, tab_ls_rmse, tab_h_rmse] = deal(NaN(length(rowNames), length(colNames)));
[tab_ls_size, tab_h_size, tab_ls_len, tab_h_len, tab_ls_LFCV, tab_h_LFCV] = deal(NaN(length(rowNames), length(colNames)));
for i = 1:size(tab_bias, 1)
    rowIdx = find(rowNames == tab_bias(i, 1));
    colIdx = find(colNames == tab_bias(i, 2));
    tab_ls_bias(rowIdx, colIdx) = tab_bias(i, 3);
    tab_ls_std(rowIdx, colIdx)  = tab_std(i, 3);
    tab_ls_rmse(rowIdx, colIdx) = tab_rmse(i, 3);
    tab_h_bias(rowIdx, colIdx)  = tab_bias(i, 4);
    tab_h_std(rowIdx, colIdx)   = tab_std(i, 4);
    tab_h_rmse(rowIdx, colIdx)  = tab_rmse(i, 4);
    tab_ls_size(rowIdx, colIdx) = tab_size(i, 3);
    tab_ls_len(rowIdx, colIdx)  = tab_len(i, 3);
    tab_ls_LFCV(rowIdx, colIdx) = tab_LFCV(i, 3);
    tab_h_size(rowIdx, colIdx)  = tab_size(i, 4);
    tab_h_len(rowIdx, colIdx)   = tab_len(i, 4);
    tab_h_LFCV(rowIdx, colIdx)  = tab_LFCV(i, 4);
end
tab_ls_bias = array2table(tab_ls_bias, 'RowNames', arrayfun(@(n) ['bias (kappa_1=' num2str(n) ')'], rowNames, 'UniformOutput', false), 'VariableNames', arrayfun(@(n) ['LS (kappa_2=' num2str(n) ')'], colNames, 'UniformOutput', false));
tab_ls_std  = array2table(tab_ls_std, 'RowNames', arrayfun(@(n) ['std (kappa_1=' num2str(n) ')'], rowNames, 'UniformOutput', false), 'VariableNames', arrayfun(@(n) ['LS (kappa_2=' num2str(n) ')'], colNames, 'UniformOutput', false));
tab_ls_rmse = array2table(tab_ls_rmse, 'RowNames', arrayfun(@(n) ['rmse (kappa_1=' num2str(n) ')'], rowNames, 'UniformOutput', false), 'VariableNames', arrayfun(@(n) ['LS (kappa_2=' num2str(n) ')'], colNames, 'UniformOutput', false));
tab_h_bias  = array2table(tab_h_bias, 'RowNames', arrayfun(@(n) ['bias (kappa_1=' num2str(n) ')'], rowNames, 'UniformOutput', false), 'VariableNames', arrayfun(@(n) ['H (kappa_2=' num2str(n) ')'], colNames, 'UniformOutput', false));
tab_h_std   = array2table(tab_h_std, 'RowNames', arrayfun(@(n) ['std (kappa_1=' num2str(n) ')'], rowNames, 'UniformOutput', false), 'VariableNames', arrayfun(@(n) ['H (kappa_2=' num2str(n) ')'], colNames, 'UniformOutput', false));
tab_h_rmse  = array2table(tab_h_rmse, 'RowNames', arrayfun(@(n) ['rmse (kappa_1=' num2str(n) ')'], rowNames, 'UniformOutput', false), 'VariableNames', arrayfun(@(n) ['H (kappa_2=' num2str(n) ')'], colNames, 'UniformOutput', false));
tab2 = vertcat(horzcat(tab_ls_bias, tab_h_bias), horzcat(tab_ls_std, tab_h_std), horzcat(tab_ls_rmse, tab_h_rmse));
tab_ls_size = array2table(tab_ls_size, 'RowNames', arrayfun(@(n) ['size (kappa_1=' num2str(n) ')'], rowNames, 'UniformOutput', false), 'VariableNames', arrayfun(@(n) ['LS (kappa_2=' num2str(n) ')'], colNames, 'UniformOutput', false));
tab_ls_len  = array2table(tab_ls_len, 'RowNames', arrayfun(@(n) ['len (kappa_1=' num2str(n) ')'], rowNames, 'UniformOutput', false), 'VariableNames', arrayfun(@(n) ['LS (kappa_2=' num2str(n) ')'], colNames, 'UniformOutput', false));
tab_ls_LFCV = array2table(tab_ls_LFCV, 'RowNames', arrayfun(@(n) ['LFCV (kappa_1=' num2str(n) ')'], rowNames, 'UniformOutput', false), 'VariableNames', arrayfun(@(n) ['LS (kappa_2=' num2str(n) ')'], colNames, 'UniformOutput', false));
tab_h_size  = array2table(tab_h_size, 'RowNames', arrayfun(@(n) ['size (kappa_1=' num2str(n) ')'], rowNames, 'UniformOutput', false), 'VariableNames', arrayfun(@(n) ['H (kappa_2=' num2str(n) ')'], colNames, 'UniformOutput', false));
tab_h_len   = array2table(tab_h_len, 'RowNames', arrayfun(@(n) ['len (kappa_1=' num2str(n) ')'], rowNames, 'UniformOutput', false), 'VariableNames', arrayfun(@(n) ['H (kappa_2=' num2str(n) ')'], colNames, 'UniformOutput', false));
tab_h_LFCV  = array2table(tab_h_LFCV, 'RowNames', arrayfun(@(n) ['LFCV (kappa_1=' num2str(n) ')'], rowNames, 'UniformOutput', false), 'VariableNames', arrayfun(@(n) ['H (kappa_2=' num2str(n) ')'], colNames, 'UniformOutput', false));
tab3 = vertcat(horzcat(tab_ls_size, tab_h_size), horzcat(tab_ls_len, tab_h_len), horzcat(tab_ls_LFCV, tab_h_LFCV));
% Output table
writetable(tab2, 'tables/table2.csv', 'WriteRowNames', true);
writetable(tab3, 'tables/table3.csv', 'WriteRowNames', true);